In [5]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from bidi.algorithm import get_display
from arabic_reshaper import reshape

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler

from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix

from dimension_reduction import DimensionReduction

from sklearn.metrics import classification_report
from sklearn.preprocessing import label_binarize
from sklearn.metrics import roc_curve
from sklearn.metrics import auc

from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from xgboost import XGBClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.linear_model import LogisticRegression

from keras.models import Sequential
from keras.layers import LSTM
from keras.layers import Dense
from keras.optimizers import Adam
from keras.utils import to_categorical

import seaborn as sns
from collections import Counter
from sklearn.model_selection import GridSearchCV
from sklearn import model_selection

from kneed import KneeLocator
from scipy.cluster.hierarchy import dendrogram, linkage
from sklearn.cluster import KMeans
from sklearn.cluster import AgglomerativeClustering
from sklearn.metrics import accuracy_score
from sklearn.metrics.cluster import fowlkes_mallows_score
from sklearn.metrics import log_loss

import warnings
warnings.filterwarnings("ignore")

Loading the dataset¶

In [6]:
df = pd.read_csv('data_spectral.csv')
df.head()
Out[6]:
0 1 2 3 4 5 6 7 8 9 ... 54 55 56 57 58 59 60 61 62 63
0 D_0 0.059704 0.087381 0.001719 -158.987503 -3.755985 -60.604965 6.924028 -40.192768 -8.211806 ... 21.546323 24.586190 55.200322 26.465070 13.116643 16.736227 16.670350 11.982480 17.727448 19.906522
1 D_0 0.064066 0.085583 0.001278 -158.987503 -3.755985 -60.604965 6.924028 -40.192768 -8.211806 ... 22.111645 24.544797 56.097532 23.586952 13.098344 16.962624 15.963867 12.388751 16.783902 23.318567
2 D_0 0.063933 0.079730 0.001191 -158.987503 -3.755985 -60.604965 6.924028 -40.192768 -8.211806 ... 22.057996 25.068478 55.802177 24.894988 13.754883 18.469141 18.749436 11.648222 19.297149 20.035435
3 D_0 0.059029 0.085087 0.001522 -158.987503 -3.755985 -60.604965 6.924028 -40.192768 -8.211806 ... 22.348724 25.147534 55.096469 30.595766 13.352426 19.142054 20.146716 14.319667 23.763115 22.128027
4 D_0 0.054941 0.083997 0.001787 -158.987503 -3.755985 -60.604965 6.924028 -40.192768 -8.211806 ... 21.124431 23.921915 53.962547 25.903111 12.602666 16.513078 16.167857 11.463181 16.514866 11.953743

5 rows × 64 columns

In [7]:
X = df.drop(["0"], axis=1)
y = df["0"]

Preprocess¶

Label Encoding¶

In [8]:
encoder = LabelEncoder()
y = encoder.fit_transform(y)

Train Test Split¶

In [9]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=31)
In [10]:
plt.bar(list(Counter(y_train).keys()), list(Counter(y_train).values()))
plt.xlabel('Dastgah')
plt.ylabel('Count')
plt.title('Dastgah Frequency of Train')
Out[10]:
Text(0.5, 1.0, 'Dastgah Frequency of Train')
In [11]:
plt.bar(list(Counter(y_test).keys()), list(Counter(y_test).values()))
plt.xlabel('Dastgah')
plt.ylabel('Count')
plt.title('Dastgah Frequency of Test')
Out[11]:
Text(0.5, 1.0, 'Dastgah Frequency of Test')

Normalization¶

In [12]:
scaler = StandardScaler()
X_test = scaler.fit_transform(np.array(X_test, dtype = float))
X_train = scaler.transform(np.array(X_train, dtype = float))

Classification¶

In [13]:
dastgah = {
    0: "شور",
    1: "سه‌گاه",
    2: "ماهور",
    3: "همایون",
    4: "راست پنج‌گاه",
    5: "نوا",
    6: "چهارگاه"
}
In [14]:
def evaluate(model):
    print("--------------------------------Train-------------------------------\n")
    print(classification_report(y_train, model.predict(X_train)))
    print("Error:", log_loss(y_train, model.predict_proba(X_train)))
    print("\n")
    print("--------------------------------Test--------------------------------\n")
    print(classification_report(y_test, model.predict(X_test)))
    print("Error:", log_loss(y_test, model.predict_proba(X_test)))
In [15]:
def plot_roc_curve(model, X, y, n_classes, title):
    y = label_binarize(y, classes = np.arange(n_classes))
    y_pred = label_binarize(model.predict(X), classes = np.arange(n_classes))

    for i in range(n_classes):
        fpr, tpr, thresholds = roc_curve(y[:, i], y_pred[:, i])
        roc_auc = auc(fpr, tpr)
        label = 'ROC of class {0} (area={1:0.2f})'.format(i, roc_auc)
        plt.plot(fpr, tpr, label = label)
        plt.plot([0, 1], [0, 1], linestyle = '--')
        plt.xlabel('False Positive Rate')
        plt.ylabel('True Positive Rate')
        plt.title('ROC Curve of ' + title)
        plt.legend(loc = 'lower right')

    plt.show()
In [16]:
def display_model_history(history):
  plt.figure(figsize=(10, 5)) 
  plt.plot(history.history['accuracy'], label = 'Train')
  plt.plot(history.history['val_accuracy'], label = 'Validation')
  plt.title('Model Accuracy')
  plt.xlabel('Epoch')
  plt.ylabel('Accuracy')
  plt.legend()
  plt.show()

  plt.figure(figsize=(10, 5)) 
  plt.plot(history.history['loss'], label = 'Train')
  plt.plot(history.history['val_loss'], label = 'Validation')
  plt.title('Model Loss')
  plt.xlabel('Epoch')
  plt.ylabel('Loss')
  plt.legend()
  plt.show()
In [17]:
def categories_bar_plot(model, X, y, n_classes, title):
    y_pred = model.predict(X)
    results = pd.DataFrame()
    results['Dastgah'] = y
    results['Dastgah']=results['Dastgah'].apply(lambda x: get_display(reshape(dastgah[x])))
    results = pd.get_dummies(results['Dastgah'])
    results['Predicted'] = y_pred
    results['Predicted']=results['Predicted'].apply(lambda x: get_display(reshape(dastgah[x])))
    results = results.groupby(['Predicted']).sum() 
    fig, ax  = plt.subplots(1,1)
    results.plot.bar(ax = ax,rot=0,figsize=(10, 4))
    ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
    ax.set_xlabel("Predicted Category")
    ax.set_ylabel("Count")
    ax.set_title("Categories bar plot for "+title)
    plt.show()
In [18]:
def heatmap_plot(model, X, y, title):
    y_pred = model.predict(X)
    cm = confusion_matrix(y, y_pred)
    sortedlabels = [get_display(reshape(label)) for label in dastgah.values()]
    cm = pd.DataFrame(cm, index=[sortedlabels], columns=sortedlabels)
    plt.figure(figsize = (10,5))
    sns.heatmap(cm, linewidths=0.5, annot=True, cmap="Blues", fmt='g')
    plt.title("Confusiuon Matrix Heatmap for "+title)
    plt.xlabel("Predicted")
    plt.ylabel("True")
    plt.show()

Support Vector Machine(SVM)¶

In [19]:
c = [1e-1, 1e0, 1e1]
gammas = [1e-1, 1e0, 1e1]
model = SVC(kernel = 'rbf', decision_function_shape = 'ovr')
params = dict(C = c, gamma = gammas)
svc = GridSearchCV(model, params, cv = 10)
svc.fit(X_train, y_train)
print('Best parameters for SVM:', svc.best_params_)
Best parameters for SVM: {'C': 10.0, 'gamma': 0.1}

Applying the best parameters¶

In [20]:
best_svc = SVC(
    kernel = 'rbf', 
    decision_function_shape = 'ovr', 
    C=svc.best_params_['C'], 
    gamma=svc.best_params_['gamma'],
    probability=True
)
best_svc.fit(X_train, y_train)
evaluate(best_svc)
--------------------------------Train-------------------------------

              precision    recall  f1-score   support

           0       0.99      1.00      0.99       598
           1       1.00      0.99      1.00       562
           2       0.99      0.99      0.99       633
           3       0.99      0.99      0.99       651
           4       0.99      0.99      0.99       513
           5       0.99      0.99      0.99       623
           6       1.00      0.99      1.00       629

    accuracy                           0.99      4209
   macro avg       0.99      0.99      0.99      4209
weighted avg       0.99      0.99      0.99      4209

Error: 0.08663110788142857


--------------------------------Test--------------------------------

              precision    recall  f1-score   support

           0       0.74      0.84      0.79       192
           1       0.87      0.90      0.88       167
           2       0.84      0.83      0.84       221
           3       0.80      0.85      0.82       213
           4       0.83      0.75      0.79       212
           5       0.80      0.75      0.77       203
           6       0.85      0.81      0.83       195

    accuracy                           0.82      1403
   macro avg       0.82      0.82      0.82      1403
weighted avg       0.82      0.82      0.82      1403

Error: 0.5902545594060903
In [21]:
plot_roc_curve(svc, X_train, y_train, n_classes=7, title="Train")
In [22]:
plot_roc_curve(svc, X_test, y_test, n_classes = 7, title="Test")
In [23]:
categories_bar_plot(best_svc, X_train, y_train, n_classes=7, title="Train")
In [24]:
categories_bar_plot(best_svc, X_test, y_test, n_classes=7, title="Test")
In [25]:
heatmap_plot(best_svc, X_train, y_train, title="Train")
In [26]:
heatmap_plot(best_svc, X_test, y_test, title="Test")

K-Nearest Neighbors(KNN)¶

In [27]:
k_range = np.arange(2, 20)
params = dict(n_neighbors=k_range)
model = KNeighborsClassifier(weights='distance', metric='manhattan')
KNN = GridSearchCV(model, params, cv = 10)
KNN.fit(X_train, y_train)
print('Best K for KNN:', KNN.best_params_)
Best K for KNN: {'n_neighbors': 2}

Applying the best parameters¶

In [28]:
best_KNN = KNeighborsClassifier(
    n_neighbors=KNN.best_params_["n_neighbors"],
    weights='distance', 
    metric='manhattan'
)
best_KNN.fit(X_train, y_train)
evaluate(best_KNN)
--------------------------------Train-------------------------------

              precision    recall  f1-score   support

           0       1.00      1.00      1.00       598
           1       1.00      1.00      1.00       562
           2       1.00      1.00      1.00       633
           3       1.00      1.00      1.00       651
           4       0.99      1.00      0.99       513
           5       1.00      0.99      0.99       623
           6       1.00      1.00      1.00       629

    accuracy                           1.00      4209
   macro avg       1.00      1.00      1.00      4209
weighted avg       1.00      1.00      1.00      4209

Error: 0.0032936430532678803


--------------------------------Test--------------------------------

              precision    recall  f1-score   support

           0       0.83      0.85      0.84       192
           1       0.88      0.93      0.90       167
           2       0.88      0.88      0.88       221
           3       0.85      0.86      0.85       213
           4       0.89      0.82      0.85       212
           5       0.82      0.84      0.83       203
           6       0.87      0.85      0.86       195

    accuracy                           0.86      1403
   macro avg       0.86      0.86      0.86      1403
weighted avg       0.86      0.86      0.86      1403

Error: 3.397213325835309
In [29]:
plot_roc_curve(KNN, X_train, y_train, n_classes = 7, title="Train")
In [30]:
plot_roc_curve(KNN, X_test, y_test, n_classes = 7, title="Test")
In [31]:
categories_bar_plot(best_KNN, X_train, y_train, n_classes=7, title="Train")
In [32]:
categories_bar_plot(best_KNN, X_test, y_test, n_classes=7, title="Test")
In [33]:
heatmap_plot(best_KNN, X_train, y_train, title="train")
In [34]:
heatmap_plot(best_KNN, X_test, y_test, title="test")

XGBoost¶

In [35]:
xgb = XGBClassifier(n_estimators=300)
xgb.fit(X_train, y_train)
Out[35]:
XGBClassifier(base_score=None, booster=None, callbacks=None,
              colsample_bylevel=None, colsample_bynode=None,
              colsample_bytree=None, early_stopping_rounds=None,
              enable_categorical=False, eval_metric=None, feature_types=None,
              gamma=None, gpu_id=None, grow_policy=None, importance_type=None,
              interaction_constraints=None, learning_rate=None, max_bin=None,
              max_cat_threshold=None, max_cat_to_onehot=None,
              max_delta_step=None, max_depth=None, max_leaves=None,
              min_child_weight=None, missing=nan, monotone_constraints=None,
              n_estimators=300, n_jobs=None, num_parallel_tree=None,
              objective='multi:softprob', predictor=None, ...)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
XGBClassifier(base_score=None, booster=None, callbacks=None,
              colsample_bylevel=None, colsample_bynode=None,
              colsample_bytree=None, early_stopping_rounds=None,
              enable_categorical=False, eval_metric=None, feature_types=None,
              gamma=None, gpu_id=None, grow_policy=None, importance_type=None,
              interaction_constraints=None, learning_rate=None, max_bin=None,
              max_cat_threshold=None, max_cat_to_onehot=None,
              max_delta_step=None, max_depth=None, max_leaves=None,
              min_child_weight=None, missing=nan, monotone_constraints=None,
              n_estimators=300, n_jobs=None, num_parallel_tree=None,
              objective='multi:softprob', predictor=None, ...)
In [36]:
evaluate(xgb)
--------------------------------Train-------------------------------

              precision    recall  f1-score   support

           0       1.00      1.00      1.00       598
           1       1.00      1.00      1.00       562
           2       1.00      1.00      1.00       633
           3       1.00      1.00      1.00       651
           4       0.99      0.99      0.99       513
           5       0.99      0.99      0.99       623
           6       1.00      1.00      1.00       629

    accuracy                           1.00      4209
   macro avg       1.00      1.00      1.00      4209
weighted avg       1.00      1.00      1.00      4209

Error: 0.008221420472188464


--------------------------------Test--------------------------------

              precision    recall  f1-score   support

           0       0.73      0.78      0.75       192
           1       0.80      0.84      0.82       167
           2       0.84      0.77      0.80       221
           3       0.74      0.81      0.77       213
           4       0.81      0.68      0.74       212
           5       0.73      0.78      0.76       203
           6       0.75      0.76      0.76       195

    accuracy                           0.77      1403
   macro avg       0.77      0.77      0.77      1403
weighted avg       0.77      0.77      0.77      1403

Error: 0.7622170174145306
In [37]:
plot_roc_curve(xgb, X_train, y_train, n_classes = 7, title="Train")
In [38]:
plot_roc_curve(xgb, X_test, y_test, n_classes = 7, title="Test")
In [39]:
categories_bar_plot(xgb, X_train, y_train, n_classes=7, title="Train")
In [40]:
categories_bar_plot(xgb, X_test, y_test, n_classes=7, title="Test")

MLP¶

Applying the best parameters¶

In [41]:
best_MLP = MLPClassifier(hidden_layer_sizes = (128, 64, 32, 8), batch_size = 16, solver = 'sgd', random_state=4,
                    verbose=False, momentum=0.85, max_iter=400, learning_rate_init = 0.006)
best_MLP.fit(X_train, y_train)
Out[41]:
MLPClassifier(batch_size=16, hidden_layer_sizes=(128, 64, 32, 8),
              learning_rate_init=0.006, max_iter=400, momentum=0.85,
              random_state=4, solver='sgd')
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
MLPClassifier(batch_size=16, hidden_layer_sizes=(128, 64, 32, 8),
              learning_rate_init=0.006, max_iter=400, momentum=0.85,
              random_state=4, solver='sgd')
In [42]:
evaluate(best_MLP)
--------------------------------Train-------------------------------

              precision    recall  f1-score   support

           0       0.99      0.99      0.99       598
           1       1.00      1.00      1.00       562
           2       1.00      0.99      0.99       633
           3       0.99      1.00      0.99       651
           4       1.00      0.98      0.99       513
           5       0.98      1.00      0.99       623
           6       1.00      1.00      1.00       629

    accuracy                           0.99      4209
   macro avg       0.99      0.99      0.99      4209
weighted avg       0.99      0.99      0.99      4209

Error: 0.02525958885441375


--------------------------------Test--------------------------------

              precision    recall  f1-score   support

           0       0.72      0.81      0.76       192
           1       0.81      0.80      0.80       167
           2       0.78      0.74      0.76       221
           3       0.75      0.81      0.78       213
           4       0.77      0.67      0.72       212
           5       0.69      0.73      0.71       203
           6       0.77      0.74      0.75       195

    accuracy                           0.75      1403
   macro avg       0.76      0.76      0.76      1403
weighted avg       0.76      0.75      0.75      1403

Error: 1.646954450207116
In [43]:
plot_roc_curve(best_MLP, X_train, y_train, n_classes = 7, title="Train")
In [44]:
plot_roc_curve(best_MLP, X_test, y_test, n_classes = 7, title="Test")
In [45]:
categories_bar_plot(best_MLP, X_train, y_train, n_classes=7, title="Train")
In [46]:
categories_bar_plot(best_MLP, X_test, y_test, n_classes=7, title="Test")
In [47]:
heatmap_plot(best_MLP, X_train, y_train, title="Train")
In [48]:
heatmap_plot(best_MLP, X_test, y_test, title="Test")

Logistic Regression¶

In [49]:
model = LogisticRegression()
params = [{'penalty':['l1','l2'], 'C':np.logspace(-4, 4, 12)}]
lr = GridSearchCV(model, params, cv = 10)
lr.fit(X_train, y_train)
print("Best parameters for Logistic Regression:(best parameters) ",lr.best_params_)
Best parameters for Logistic Regression:(best parameters)  {'C': 2.310129700083158, 'penalty': 'l2'}

Applying the best parameters¶

In [50]:
best_lr = LogisticRegression( C=lr.best_params_['C'], penalty=lr.best_params_['penalty'])
best_lr.fit(X_train, y_train)
evaluate(best_lr)
--------------------------------Train-------------------------------

              precision    recall  f1-score   support

           0       0.27      0.27      0.27       598
           1       0.27      0.22      0.24       562
           2       0.30      0.30      0.30       633
           3       0.27      0.35      0.31       651
           4       0.25      0.23      0.24       513
           5       0.31      0.35      0.33       623
           6       0.29      0.24      0.26       629

    accuracy                           0.28      4209
   macro avg       0.28      0.28      0.28      4209
weighted avg       0.28      0.28      0.28      4209

Error: 1.8260369383457316


--------------------------------Test--------------------------------

              precision    recall  f1-score   support

           0       0.24      0.22      0.23       192
           1       0.19      0.19      0.19       167
           2       0.28      0.28      0.28       221
           3       0.26      0.38      0.31       213
           4       0.31      0.24      0.27       212
           5       0.32      0.33      0.33       203
           6       0.24      0.19      0.21       195

    accuracy                           0.26      1403
   macro avg       0.26      0.26      0.26      1403
weighted avg       0.27      0.26      0.26      1403

Error: 1.8581922051412463
In [51]:
plot_roc_curve(best_lr, X_train, y_train, n_classes = 7, title="Train")
In [52]:
plot_roc_curve(best_lr, X_test, y_test, n_classes = 7, title="Test")
In [53]:
categories_bar_plot(best_lr, X_train, y_train, n_classes=7, title="Train")
In [54]:
categories_bar_plot(best_lr, X_test, y_test, n_classes=7, title="Test")
In [55]:
heatmap_plot(best_lr, X_train, y_train, title="Train")
In [56]:
heatmap_plot(best_lr, X_test, y_test, title="Test")

LSTM¶

In [58]:
X_train_new, X_valid, y_train_new, y_valid = train_test_split(X_train, y_train, test_size=0.1, random_state=31)
y_train_one_hot = to_categorical(y_train_new)
y_valid_one_hot = to_categorical(y_valid)

model = Sequential()
model.add(LSTM(units=128, dropout=0.1, recurrent_dropout=0.35, return_sequences=True, input_shape=(X_train_new.shape[1], 1)))
model.add(LSTM(units=64,  dropout=0.1, recurrent_dropout=0.35, return_sequences=False))
model.add(Dense(units=y_train_one_hot.shape[1], activation="softmax"))

print("Compiling ...")
model.compile(optimizer = 'adam', loss='categorical_crossentropy', metrics = ['accuracy'])
model.summary()

print("Training ...")
history = model.fit(X_train_new, y_train_one_hot, batch_size=64, epochs=80, validation_data = (X_valid, y_valid_one_hot))
Compiling ...
Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 lstm_2 (LSTM)               (None, 63, 128)           66560     
                                                                 
 lstm_3 (LSTM)               (None, 64)                49408     
                                                                 
 dense_1 (Dense)             (None, 7)                 455       
                                                                 
=================================================================
Total params: 116,423
Trainable params: 116,423
Non-trainable params: 0
_________________________________________________________________
Training ...
Epoch 1/80
60/60 [==============================] - 15s 156ms/step - loss: 1.9397 - accuracy: 0.1555 - val_loss: 1.9580 - val_accuracy: 0.1544
Epoch 2/80
60/60 [==============================] - 10s 158ms/step - loss: 1.9303 - accuracy: 0.1956 - val_loss: 1.9430 - val_accuracy: 0.1710
Epoch 3/80
60/60 [==============================] - 10s 161ms/step - loss: 1.9190 - accuracy: 0.2088 - val_loss: 1.9367 - val_accuracy: 0.2090
Epoch 4/80
60/60 [==============================] - 10s 172ms/step - loss: 1.9120 - accuracy: 0.2093 - val_loss: 1.9311 - val_accuracy: 0.1971
Epoch 5/80
60/60 [==============================] - 11s 187ms/step - loss: 1.9029 - accuracy: 0.2054 - val_loss: 1.9174 - val_accuracy: 0.2043
Epoch 6/80
60/60 [==============================] - 11s 177ms/step - loss: 1.8885 - accuracy: 0.2228 - val_loss: 1.9099 - val_accuracy: 0.2162
Epoch 7/80
60/60 [==============================] - 10s 164ms/step - loss: 1.8838 - accuracy: 0.2252 - val_loss: 1.9098 - val_accuracy: 0.2114
Epoch 8/80
60/60 [==============================] - 12s 192ms/step - loss: 1.8687 - accuracy: 0.2413 - val_loss: 1.8871 - val_accuracy: 0.2399
Epoch 9/80
60/60 [==============================] - 7s 123ms/step - loss: 1.8643 - accuracy: 0.2368 - val_loss: 1.8835 - val_accuracy: 0.2447
Epoch 10/80
60/60 [==============================] - 8s 134ms/step - loss: 1.8596 - accuracy: 0.2437 - val_loss: 1.8840 - val_accuracy: 0.2447
Epoch 11/80
60/60 [==============================] - 10s 161ms/step - loss: 1.8512 - accuracy: 0.2447 - val_loss: 1.8672 - val_accuracy: 0.2470
Epoch 12/80
60/60 [==============================] - 8s 127ms/step - loss: 1.8399 - accuracy: 0.2569 - val_loss: 1.8623 - val_accuracy: 0.2613
Epoch 13/80
60/60 [==============================] - 7s 117ms/step - loss: 1.8336 - accuracy: 0.2677 - val_loss: 1.8529 - val_accuracy: 0.2660
Epoch 14/80
60/60 [==============================] - 6s 106ms/step - loss: 1.8260 - accuracy: 0.2751 - val_loss: 1.8469 - val_accuracy: 0.2732
Epoch 15/80
60/60 [==============================] - 6s 107ms/step - loss: 1.8123 - accuracy: 0.2698 - val_loss: 1.8577 - val_accuracy: 0.2447
Epoch 16/80
60/60 [==============================] - 6s 103ms/step - loss: 1.8079 - accuracy: 0.2635 - val_loss: 1.8499 - val_accuracy: 0.2708
Epoch 17/80
60/60 [==============================] - 6s 103ms/step - loss: 1.8006 - accuracy: 0.2907 - val_loss: 1.8306 - val_accuracy: 0.2827
Epoch 18/80
60/60 [==============================] - 6s 103ms/step - loss: 1.7877 - accuracy: 0.2864 - val_loss: 1.8258 - val_accuracy: 0.2684
Epoch 19/80
60/60 [==============================] - 6s 105ms/step - loss: 1.7769 - accuracy: 0.2907 - val_loss: 1.8196 - val_accuracy: 0.2755
Epoch 20/80
60/60 [==============================] - 6s 104ms/step - loss: 1.7603 - accuracy: 0.3002 - val_loss: 1.7966 - val_accuracy: 0.2898
Epoch 21/80
60/60 [==============================] - 6s 107ms/step - loss: 1.7422 - accuracy: 0.3155 - val_loss: 1.7772 - val_accuracy: 0.2874
Epoch 22/80
60/60 [==============================] - 7s 109ms/step - loss: 1.7332 - accuracy: 0.3207 - val_loss: 1.7541 - val_accuracy: 0.3135
Epoch 23/80
60/60 [==============================] - 6s 102ms/step - loss: 1.7183 - accuracy: 0.3271 - val_loss: 1.7553 - val_accuracy: 0.2850
Epoch 24/80
60/60 [==============================] - 6s 102ms/step - loss: 1.7190 - accuracy: 0.3197 - val_loss: 1.7602 - val_accuracy: 0.3183
Epoch 25/80
60/60 [==============================] - 6s 102ms/step - loss: 1.7103 - accuracy: 0.3329 - val_loss: 1.7595 - val_accuracy: 0.3135
Epoch 26/80
60/60 [==============================] - 6s 104ms/step - loss: 1.6849 - accuracy: 0.3390 - val_loss: 1.7331 - val_accuracy: 0.3349
Epoch 27/80
60/60 [==============================] - 6s 103ms/step - loss: 1.6600 - accuracy: 0.3561 - val_loss: 1.7230 - val_accuracy: 0.3230
Epoch 28/80
60/60 [==============================] - 6s 103ms/step - loss: 1.6667 - accuracy: 0.3519 - val_loss: 1.7014 - val_accuracy: 0.3610
Epoch 29/80
60/60 [==============================] - 6s 102ms/step - loss: 1.6469 - accuracy: 0.3551 - val_loss: 1.6658 - val_accuracy: 0.3682
Epoch 30/80
60/60 [==============================] - 6s 103ms/step - loss: 1.6317 - accuracy: 0.3717 - val_loss: 1.6701 - val_accuracy: 0.3682
Epoch 31/80
60/60 [==============================] - 6s 103ms/step - loss: 1.6160 - accuracy: 0.3812 - val_loss: 1.6836 - val_accuracy: 0.3444
Epoch 32/80
60/60 [==============================] - 6s 104ms/step - loss: 1.6031 - accuracy: 0.3928 - val_loss: 1.6641 - val_accuracy: 0.3705
Epoch 33/80
60/60 [==============================] - 6s 102ms/step - loss: 1.5778 - accuracy: 0.3838 - val_loss: 1.6439 - val_accuracy: 0.3705
Epoch 34/80
60/60 [==============================] - 6s 105ms/step - loss: 1.5812 - accuracy: 0.3997 - val_loss: 1.6293 - val_accuracy: 0.3634
Epoch 35/80
60/60 [==============================] - 6s 105ms/step - loss: 1.5575 - accuracy: 0.4124 - val_loss: 1.6441 - val_accuracy: 0.3634
Epoch 36/80
60/60 [==============================] - 6s 103ms/step - loss: 1.5316 - accuracy: 0.4232 - val_loss: 1.6098 - val_accuracy: 0.4086
Epoch 37/80
60/60 [==============================] - 6s 103ms/step - loss: 1.5330 - accuracy: 0.4240 - val_loss: 1.5827 - val_accuracy: 0.4181
Epoch 38/80
60/60 [==============================] - 6s 103ms/step - loss: 1.5223 - accuracy: 0.4248 - val_loss: 1.6219 - val_accuracy: 0.3895
Epoch 39/80
60/60 [==============================] - 6s 102ms/step - loss: 1.5029 - accuracy: 0.4322 - val_loss: 1.5724 - val_accuracy: 0.4276
Epoch 40/80
60/60 [==============================] - 6s 103ms/step - loss: 1.4931 - accuracy: 0.4488 - val_loss: 1.5608 - val_accuracy: 0.3990
Epoch 41/80
60/60 [==============================] - 6s 103ms/step - loss: 1.4783 - accuracy: 0.4501 - val_loss: 1.5918 - val_accuracy: 0.3872
Epoch 42/80
60/60 [==============================] - 6s 102ms/step - loss: 1.4660 - accuracy: 0.4472 - val_loss: 1.5513 - val_accuracy: 0.4181
Epoch 43/80
60/60 [==============================] - 6s 103ms/step - loss: 1.4480 - accuracy: 0.4575 - val_loss: 1.5449 - val_accuracy: 0.4276
Epoch 44/80
60/60 [==============================] - 6s 104ms/step - loss: 1.4390 - accuracy: 0.4636 - val_loss: 1.5481 - val_accuracy: 0.4252
Epoch 45/80
60/60 [==============================] - 6s 105ms/step - loss: 1.4186 - accuracy: 0.4815 - val_loss: 1.5357 - val_accuracy: 0.4418
Epoch 46/80
60/60 [==============================] - 6s 104ms/step - loss: 1.4125 - accuracy: 0.4768 - val_loss: 1.5050 - val_accuracy: 0.4466
Epoch 47/80
60/60 [==============================] - 6s 107ms/step - loss: 1.3947 - accuracy: 0.4836 - val_loss: 1.5167 - val_accuracy: 0.4442
Epoch 48/80
60/60 [==============================] - 6s 104ms/step - loss: 1.3990 - accuracy: 0.4781 - val_loss: 1.5101 - val_accuracy: 0.4323
Epoch 49/80
60/60 [==============================] - 6s 102ms/step - loss: 1.3632 - accuracy: 0.5008 - val_loss: 1.5066 - val_accuracy: 0.4584
Epoch 50/80
60/60 [==============================] - 6s 103ms/step - loss: 1.3793 - accuracy: 0.4865 - val_loss: 1.5062 - val_accuracy: 0.4418
Epoch 51/80
60/60 [==============================] - 6s 103ms/step - loss: 1.3494 - accuracy: 0.5135 - val_loss: 1.5027 - val_accuracy: 0.4347
Epoch 52/80
60/60 [==============================] - 7s 109ms/step - loss: 1.3354 - accuracy: 0.5140 - val_loss: 1.4770 - val_accuracy: 0.4632
Epoch 53/80
60/60 [==============================] - 6s 104ms/step - loss: 1.3242 - accuracy: 0.5182 - val_loss: 1.4852 - val_accuracy: 0.4513
Epoch 54/80
60/60 [==============================] - 6s 104ms/step - loss: 1.3087 - accuracy: 0.5238 - val_loss: 1.4786 - val_accuracy: 0.4394
Epoch 55/80
60/60 [==============================] - 6s 103ms/step - loss: 1.3072 - accuracy: 0.5230 - val_loss: 1.4385 - val_accuracy: 0.4703
Epoch 56/80
60/60 [==============================] - 6s 102ms/step - loss: 1.2965 - accuracy: 0.5267 - val_loss: 1.4661 - val_accuracy: 0.4632
Epoch 57/80
60/60 [==============================] - 6s 102ms/step - loss: 1.2834 - accuracy: 0.5362 - val_loss: 1.4438 - val_accuracy: 0.4703
Epoch 58/80
60/60 [==============================] - 6s 102ms/step - loss: 1.2562 - accuracy: 0.5372 - val_loss: 1.4640 - val_accuracy: 0.4846
Epoch 59/80
60/60 [==============================] - 6s 102ms/step - loss: 1.2536 - accuracy: 0.5544 - val_loss: 1.4594 - val_accuracy: 0.4774
Epoch 60/80
60/60 [==============================] - 6s 102ms/step - loss: 1.2214 - accuracy: 0.5576 - val_loss: 1.4059 - val_accuracy: 0.4988
Epoch 61/80
60/60 [==============================] - 6s 101ms/step - loss: 1.2269 - accuracy: 0.5626 - val_loss: 1.4477 - val_accuracy: 0.4703
Epoch 62/80
60/60 [==============================] - 6s 102ms/step - loss: 1.2292 - accuracy: 0.5494 - val_loss: 1.4326 - val_accuracy: 0.5107
Epoch 63/80
60/60 [==============================] - 6s 103ms/step - loss: 1.2005 - accuracy: 0.5641 - val_loss: 1.4012 - val_accuracy: 0.4941
Epoch 64/80
60/60 [==============================] - 6s 102ms/step - loss: 1.2024 - accuracy: 0.5694 - val_loss: 1.4156 - val_accuracy: 0.4917
Epoch 65/80
60/60 [==============================] - 6s 102ms/step - loss: 1.1996 - accuracy: 0.5694 - val_loss: 1.4263 - val_accuracy: 0.5083
Epoch 66/80
60/60 [==============================] - 7s 109ms/step - loss: 1.1822 - accuracy: 0.5726 - val_loss: 1.4166 - val_accuracy: 0.4941
Epoch 67/80
60/60 [==============================] - 7s 112ms/step - loss: 1.1679 - accuracy: 0.5829 - val_loss: 1.3963 - val_accuracy: 0.5012
Epoch 68/80
60/60 [==============================] - 7s 122ms/step - loss: 1.1540 - accuracy: 0.5884 - val_loss: 1.4146 - val_accuracy: 0.5321
Epoch 69/80
60/60 [==============================] - 7s 118ms/step - loss: 1.1224 - accuracy: 0.6043 - val_loss: 1.3967 - val_accuracy: 0.5321
Epoch 70/80
60/60 [==============================] - 7s 110ms/step - loss: 1.1349 - accuracy: 0.5987 - val_loss: 1.4060 - val_accuracy: 0.5107
Epoch 71/80
60/60 [==============================] - 7s 111ms/step - loss: 1.1110 - accuracy: 0.6088 - val_loss: 1.3867 - val_accuracy: 0.5321
Epoch 72/80
60/60 [==============================] - 8s 126ms/step - loss: 1.1203 - accuracy: 0.6043 - val_loss: 1.4027 - val_accuracy: 0.5297
Epoch 73/80
60/60 [==============================] - 7s 111ms/step - loss: 1.1215 - accuracy: 0.5971 - val_loss: 1.3838 - val_accuracy: 0.5344
Epoch 74/80
60/60 [==============================] - 7s 124ms/step - loss: 1.1204 - accuracy: 0.5979 - val_loss: 1.3889 - val_accuracy: 0.5297
Epoch 75/80
60/60 [==============================] - 7s 115ms/step - loss: 1.0938 - accuracy: 0.6125 - val_loss: 1.3784 - val_accuracy: 0.5249
Epoch 76/80
60/60 [==============================] - 6s 106ms/step - loss: 1.0764 - accuracy: 0.6169 - val_loss: 1.3697 - val_accuracy: 0.5321
Epoch 77/80
60/60 [==============================] - 8s 131ms/step - loss: 1.0525 - accuracy: 0.6257 - val_loss: 1.4134 - val_accuracy: 0.5392
Epoch 78/80
60/60 [==============================] - 7s 108ms/step - loss: 1.0696 - accuracy: 0.6185 - val_loss: 1.3743 - val_accuracy: 0.5273
Epoch 79/80
60/60 [==============================] - 8s 131ms/step - loss: 1.0389 - accuracy: 0.6341 - val_loss: 1.3984 - val_accuracy: 0.5344
Epoch 80/80
60/60 [==============================] - 7s 108ms/step - loss: 1.0412 - accuracy: 0.6333 - val_loss: 1.3622 - val_accuracy: 0.5511
In [59]:
display_model_history(history)
In [77]:
y_pred = np.argmax(model.predict(X_train), axis = 1)
print("--------------------------------Train-------------------------------\n")
print(classification_report(y_train, y_pred))
132/132 [==============================] - 3s 22ms/step
--------------------------------Train-------------------------------

              precision    recall  f1-score   support

           0       0.78      0.78      0.78       598
           1       0.75      0.78      0.77       562
           2       0.76      0.78      0.77       633
           3       0.73      0.72      0.73       651
           4       0.81      0.70      0.75       513
           5       0.76      0.78      0.77       623
           6       0.77      0.80      0.79       629

    accuracy                           0.76      4209
   macro avg       0.77      0.76      0.76      4209
weighted avg       0.76      0.76      0.76      4209

In [76]:
y_pred = np.argmax(model.predict(X_test), axis = 1)
print("--------------------------------Test--------------------------------\n")
print(classification_report(y_test, y_pred))
44/44 [==============================] - 1s 18ms/step
--------------------------------Test--------------------------------

              precision    recall  f1-score   support

           0       0.55      0.56      0.55       192
           1       0.59      0.67      0.63       167
           2       0.65      0.65      0.65       221
           3       0.55      0.58      0.56       213
           4       0.66      0.44      0.53       212
           5       0.51      0.58      0.54       203
           6       0.58      0.58      0.58       195

    accuracy                           0.58      1403
   macro avg       0.58      0.58      0.58      1403
weighted avg       0.58      0.58      0.58      1403

Feature Selection¶

Linear Discriminant Analysis(LDA)¶

In [62]:
dr = DimensionReduction(X_train, y_train, X_test, y_test)
X_train_lda, X_test_lda = dr.LDA(n_components=2)

Clustring¶

Determining the ideal number of clusters:

In [63]:
def calculate_erros(data, kmax):
    wse = []
    for k in range(1, kmax+1):
        err = 0
        kmeans = KMeans(n_clusters=k, init='k-means++', algorithm='elkan', max_iter=100).fit(data)
        pred_clusters = kmeans.predict(data)
        for i in range(len(data)):
            center = kmeans.cluster_centers_[pred_clusters[i]]
            err += (data[i, 0] - center[0]) ** 2 + (data[i, 1] -center[1]) ** 2
        wse.append(err)
    return wse

list_of_wss = calculate_erros(X_train_lda, 20)
optimal_k = KneeLocator([i for i in range(1, 21)], list_of_wss, curve='convex', direction='decreasing').knee
print("Optimal number of clusters: ", optimal_k)
plt.plot(range(1, 21), list_of_wss, marker='o',  linestyle='--')
plt.xlabel('number of clusters')
plt.ylabel('Within Cluster Sum of Squares (WCSS)')
plt.xticks(range(1, 21))
plt.show()
Optimal number of clusters:  6
In [64]:
def plot_dendrogram(train_data,test_data,num_of_features):
    fig, axes = plt.subplots(ncols=2, nrows=1,figsize=(25, 10))
    axes[0].set_title('Dendogram for Train Data with '+ str(num_of_features) +' features')
    clusters = linkage(train_data, method='ward')
    dendrogram(clusters,truncate_mode='lastp',show_contracted=True,ax=axes[0],no_labels=True)
    axes[1].set_title('Dendogram for Test Data with '+ str(num_of_features) +' features')
    clusters = linkage(test_data, method='ward')
    dendrogram(clusters,truncate_mode='lastp',show_contracted=True,ax=axes[1],no_labels=True)
    plt.show()

Clustring Using Data with 2 Features¶

In [65]:
def evaluate_clustering(model, number_of_clusters, model_name, d=2):
    if d == 2:
        fig, ax = plt.subplots(1, 2, figsize=(12, 6))
        ax[0].scatter(X_train_lda[:, 0], X_train_lda[:, 1], c=model.fit_predict(X_train))
        ax[0].set_title(f"{model_name} Train\nNumber of Clusters: {number_of_clusters}\nFowlkes Mallow Score: {fowlkes_mallows_score(y_train, model.fit_predict(X_train))}")

        ax[1].scatter(X_test_lda[:, 0], X_test_lda[:, 1], c=model.fit_predict(X_test))
        ax[1].set_title(f"{model_name} Test\nNumber of Clusters: {number_of_clusters}\nFowlkes Mallow Score: {fowlkes_mallows_score(y_test, model.fit_predict(X_test))}")
    
    
    if d == 3:
        fig, ax = plt.subplots(1, 2, figsize=(12, 9), subplot_kw=dict(projection='3d'))
        
        ax[0].scatter(X_train_lda[:, 0], X_train_lda[:, 1], X_train_lda[:, 2], c=model.fit_predict(X_train))
        ax[0].set_title(f"{model_name} Train\nNumber of Clusters: {number_of_clusters}\nFowlkes Mallows Score: {fowlkes_mallows_score(y_train, model.fit_predict(X_train))}")

        ax[1].scatter(X_test_lda[:, 0], X_test_lda[:, 1], X_test_lda[:, 2], c=model.fit_predict(X_test))
        ax[1].set_title(f"{model_name} Test\nNumber of Clusters: {number_of_clusters}\nFowlkes Mallow Score: {fowlkes_mallows_score(y_test, model.fit_predict(X_test))}")
    fig.tight_layout()
    plt.show()

K-Means¶

In [66]:
clusters = [2, 7, 20]
In [67]:
for n_clusters in clusters:
    kMeans = KMeans(n_clusters=n_clusters, init='k-means++', algorithm='elkan', max_iter=100)
    kMeans.fit(X_train_lda)
    evaluate_clustering(kMeans, number_of_clusters=n_clusters, model_name="K-Means")
    plt.show()

Agglomerative¶

In [68]:
for n_clusters in clusters:
    agglomerative = AgglomerativeClustering(n_clusters=n_clusters)
    agglomerative.fit(X_train_lda)
    evaluate_clustering(agglomerative, number_of_clusters=n_clusters, model_name="Agglomerative")
    plt.show()
In [69]:
plot_dendrogram( X_train_lda,X_test_lda,num_of_features=2)

Clustring Using Data with 3 Features¶

In [70]:
dr = DimensionReduction(X_train, y_train, X_test, y_test)
X_train_lda, X_test_lda = dr.LDA(n_components=3)

K-Means¶

In [71]:
for n_clusters in clusters:
    kMeans = KMeans(n_clusters=n_clusters, init='k-means++', algorithm='elkan', max_iter=100)
    kMeans.fit(X_train_lda)
    evaluate_clustering(kMeans, number_of_clusters=n_clusters, model_name="K-Means", d=3)
    plt.show()
In [72]:
for n_clusters in clusters:
    agglomerative = AgglomerativeClustering(n_clusters=n_clusters)
    agglomerative.fit(X_train_lda)
    evaluate_clustering(agglomerative, number_of_clusters=n_clusters, model_name="Agglomerative", d=3)
    plt.show()
In [73]:
plot_dendrogram( X_train_lda,X_test_lda,num_of_features=3)
In [ ]: